import json
import openai
from tqdm import tqdm
import pandas as pd
import argparse
import os
import sys
from typing import Dict, List, Optional
from openai import AzureOpenAI
import re

def parse_args() -> argparse.Namespace:
    """Parse command-line arguments for parallel execution."""
    parser = argparse.ArgumentParser(
        description="Run zero-shot persuasion-strategy classification over a slice of the dataset.",
    )
    parser.add_argument("--start", type=int, default=0, help="Start index (inclusive) of the slice.")
    parser.add_argument("--end", type=int, default=None, help="End index (inclusive) of the slice.")
    parser.add_argument("--output_dir", type=str, default="persuasion_results", help="Directory to write JSON results.")
    parser.add_argument("--csv_path", type=str, required=True, help="Path to the input CSV with columns video_id,story")
    return parser.parse_args()

# ---------------------------------------------------------------------------
# Zero-shot classification system prompt
# ---------------------------------------------------------------------------
SYSTEM_PROMPT = (
    "You will be given a dictionary called persuasion_vocab that lists "
    "persuasion strategies with their definitions. You will also be given the STORY "
    "text of a video advertisement. Your task is to choose the SINGLE most relevant persuasion strategy key from "
    "persuasion_vocab that is most central to how the advertisement seeks to persuade viewers. "
    "Output ONLY the strategy key, nothing else."
)

# Emotion vocabulary
# topics = "Emotion_vocab = {'active': 'active(energetic, adventurous, vibrant, enthusiastic, playful)', 'afraid': 'afraid(horrified, scared, fearful)', 'alarmed': 'alarmed(concerned, worried, anxious, overwhelmed)', 'alert': 'alert(attentive, curious)', 'amazed': 'amazed(surprised, astonished, awed, fascinated, intrigued)', 'amused': 'amused(humored, laughing)', 'angry': 'angry(annoyed, irritated)', 'calm': 'calm(soothed, peaceful, comforted, fullfilled, cozy)', 'cheerful': 'cheerful(delighted, happy, joyful, carefree, optimistic)', 'confident': 'confident(assured, strong, healthy)', 'conscious': 'conscious(aware, thoughtful, prepared)', 'creative': 'creative(inventive, productive)', 'disturbed': 'disturbed(disgusted, shocked)', 'eager': 'eager(hungry, thirsty, passionate)', 'educated': 'educated(informed, enlightened, smart, savvy, intelligent)', 'emotional': 'emotional(vulnerable, moved, nostalgic, reminiscent)', 'empathetic': 'empathetic(sympathetic, supportive, understanding, receptive)', 'fashionable': 'fashionable(trendy, elegant, beautiful, attractive, sexy)', 'feminine': 'feminine(womanly, girlish)', 'grateful': 'grateful(thankful)', 'inspired': 'inspired(motivated, ambitious, empowered, determined)', 'jealous': 'jealous', 'loving': 'loving(loved, romantic)', 'manly': 'manly', 'persuaded': 'persuaded(impressed, enchanted, immersed)', 'pessimistic': 'pessimistic(skeptical)', 'proud': 'proud(patriotic)', 'sad': 'sad(depressed, upset, betrayed, distant)', 'thrifty': 'thrifty(frugal)', 'youthful': 'youthful(childlike)'}"

topics = "Persuasion Strategies Vocabulary: { 'Authority':'Authority indicated through expertise, source of power, third-party approval, credentials, and awards','Social Identity':'Normative influence, which involves conformity with the positive expectations of 'another', who could be another person, a group, or ones self.using the idea of 'everyone else is doing it' to influence people's behavior.', 'Social Proof':'efers to the use of testimonials, reviews, or other forms of social validation to demonstrate the popularity, trustworthiness, or quality of a product or brand. By leveraging social proof, advertisements can increase consumers' confidence and trust in the product or brand, and encourage them to make a purchase.','Reciprocity':'By obligating the recipient of an act to repayment in the future, the rule for reciprocation begets a sense of future obligation, often unequal in nature','Foot in the door':'Starting with small requests followed by larger requests to facilitate compliance while maintaining cognitive coherence.','Overcoming Reactance':'Overcoming resistance (reactance) by postponing consequences to the future, by focusing resistance on realistic concerns, by forewarning that a message will be coming, by acknowledging resistance, by raising self-esteem and a sense of efficacy.','Concreteness':'concreteness refers to the use of specific, tangible details or examples to make an abstract or complex concept more concrete and relatable to consumers. By using concrete language and imagery, advertisements can increase consumers' understanding and engagement with the product or brand, and create a more vivid and memorable impression.','Anchoring and Comparison':'anchoring refers to the use of a reference point or starting point to influence consumers' perceptions of value or price. Comparison refers to the use of side-by-side or direct comparisons to demonstrate the superiority of a product or brand over competitors. Both anchoring and comparison are common persuasion strategies used in advertising to influence consumer decision-making.','Social Impact':'Refers to the positive effect that an advertisement has on society or the broader community. This can include promoting social causes, raising awareness about important issues, or encouraging positive behaviors and attitudes.','Scarcity':'People assign more value to opportunities when they are less available. This happens due to psychological reactance of losing freedom of choice when things are less available or they use availability as a cognitive shortcut for gauging quality.','Unclear':'If the strategy used in the advertisement is unclear or it is not in English or no strategy is used as the central message of the advertisement'}"
def main():
    args = parse_args()
    os.makedirs(args.output_dir, exist_ok=True)

    # Setup Azure OpenAI client
    api_version = "2024-02-15-preview"
    config_dict: Dict[str, str] = {
        "api_key": os.getenv("OPENAI_API_KEY", "YOUR_OPENAI_API_KEY"),
        "api_version": api_version,
        "azure_endpoint": os.getenv("AZURE_OPENAI_ENDPOINT", "https://your-azure-openai-endpoint/"),
    }
    client = AzureOpenAI(
        api_key=config_dict["api_key"],
        api_version=config_dict["api_version"],
        azure_endpoint=config_dict["azure_endpoint"],
    )

    # Load CSV data
    try:
        df = pd.read_csv(args.csv_path)
    except Exception as e:
        print(f"Error reading CSV {args.csv_path}: {e}")
        sys.exit(1)

    all_records = df.to_dict(orient='records')

    # Determine slice for this run
    start_idx = args.start
    end_idx = len(all_records) - 1 if args.end is None else min(args.end, len(all_records) - 1)
    slice_records = all_records[start_idx : end_idx + 1]

    print(f"Processing slice {start_idx}–{end_idx} (n={len(slice_records)})")

    results = []
    output_path = os.path.join(args.output_dir, f"persuasion_results_{start_idx}_{end_idx}.json")

    for rec in tqdm(slice_records, desc=f"Persuasion Eval {start_idx}-{end_idx}"):
        try:
            video_id = str(rec.get('video_id', '')).strip()
            story_text = rec.get('story', '')
            cleaned_text = ' '.join(str(story_text).split()).replace('\n', '').replace('\f', '')

            # Build zero-shot prompt
            messages = [
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": f"{topics}\n\nStory: {cleaned_text}"},
            ]

            try:
                response = client.chat.completions.create(
                    model="gpt-4o",
                    messages=messages,
                    max_tokens=20,
                    temperature=0.0,
                    n=1,
                )
                pred_topic = response.choices[0].message.content.strip().lower().strip("'\". ,")
            except Exception as e:
                print(f"Error during OpenAI call for key {video_id}: {e}")
                pred_topic = "error_api"

            # Store results
            result_item = {
                'video_id': video_id,
                'url': f"https://www.youtube.com/watch?v={video_id}" if video_id else "",
                'story': cleaned_text,
                'predicted_topic': pred_topic,
            }
            results.append(result_item)
            
            # Incremental save
            with open(output_path, 'w') as f:
                json.dump(results, f, indent=4)

        except Exception as e:
            print(f"Error processing key {video_id}: {e}")
            continue

    print(f"Finished processing. Results saved to {output_path}")

if __name__ == "__main__":
    main()




